Protobothrops mucrosquamatus (Taiwan habu)

dir = list.dirs("agneesh/all_data/kallisto_results/", full.names = T, recursive = F)
filename = file.path(dir, "abundance.tsv")
names <- data.frame(name = filename) %>% separate(name, sep = "//", into = c(NA,"test")) %>% separate(test, sep = "/", into = c("name", NA)) %>% pull(name)

pm <- tibble(target_id = character(), est_counts = numeric(), tpm = numeric(), library = character())

for (j in seq(filename)) pm <- rbind(pm, read_tsv(filename[j]) %>% select(target_id, est_counts, tpm) %>% mutate(library = names[j]))
genes <- read_tsv("ref/genes.txt", col_names = c("target_id", "gene"))
taiwan_venom <- read_rds("data/pm.RDS") %>% filter(grepl("Pm", library)) %>% left_join(genes, by = "target_id") %>% na.omit() %>% 
  group_by(gene) %>% summarize(tpm = sum(tpm))
taiwan_proteome <- read_csv("data/taiwan-secreted.csv", col_types = "-nc", col_names = c("gene", "type"))
rbind(read_tsv("data/1-Taiwan-Female/abundance.tsv") %>% mutate(library = "taiwan1"),
read_tsv("data/11-Taiwan-Female/abundance.tsv") %>% mutate(library = "taiwan11"),
read_tsv("data/2-Taiwan-Female/abundance.tsv") %>% mutate(library = "taiwan2"),
read_tsv("data/3-Taiwan-Male/abundance.tsv") %>% mutate(library = "taiwan3")) %>%
  left_join(genes, by = "target_id") %>% na.omit() %>% 
  group_by(library, gene) %>% summarize(tpm = sum(tpm), est_counts = sum(est_counts)) -> taiwan

averaged_data <- left_join(taiwan %>% filter(est_counts > 3) %>%
                             group_by(gene) %>% summarize(tpm = mean(tpm), est_counts = mean(est_counts)), taiwan_venom %>% group_by(gene) %>% summarize(tpm = mean(tpm)), by = "gene") 
with(averaged_data, cor.test(tpm.x, tpm.y, method = "s"))
## Warning in cor.test.default(tpm.x, tpm.y, method = "s"): Cannot compute exact p-
## value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  tpm.x and tpm.y
## S = 5.9513e+10, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3908262
tp1 <- averaged_data %>% ggplot(aes(tpm.y, tpm.x)) + geom_hex() + scale_y_log10(labels=trans_format('log10',math_format(10^.x))) + scale_x_log10(labels=trans_format('log10',math_format(10^.x))) + theme_bw() + xlab("Venom gland RNA (TPM)") + ylab("Venom RNA (TPM)") + labs(color=expression('Log'[10]*" counts")) + guides(fill = "none")
tp1
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 89 rows containing non-finite values (stat_binhex).

taiwan_merged_venom <- left_join(filter(taiwan, gene %in% taiwan_proteome$gene), taiwan_venom, by = "gene") %>% left_join(taiwan_proteome)

taiwan_merged_venom  %>% group_by(library)  %>% summarize(cor = cor(tpm.x, tpm.y), method = "s")
left_join(filter(taiwan, est_counts >= 1 & ! gene %in% taiwan_proteome$gene), taiwan_venom, by = "gene") %>% group_by(library)  %>% summarize(cor = cor(tpm.x, tpm.y), method = "s")
p1 <- taiwan_merged_venom %>% na.omit() %>% filter(est_counts >= 1) %>% ggplot(aes(tpm.y, tpm.x, shape = library, color = type)) + geom_line(aes(group = gene), color = "grey") + geom_point() + scale_y_log10(labels=trans_format('log10',math_format(10^.x))) +scale_x_log10(labels=trans_format('log10',math_format(10^.x))) + theme_bw() + xlab("Average venom gland gene expression (TPM)") + ylab("Venom mRNA level (TPM)") + guides(shape = "none") + ggtitle("Protobothrops mucrosquamatus") + theme(plot.title = element_text(face = "italic"))
p1
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous x-axis

#taiwan_merged_venom %>% na.omit() %>% filter(est_counts >= 1) %>% ggplot(aes(tpm.y, tpm.x, shape = library, color = type)) + geom_line(aes(group = gene), color = "grey") + geom_point() + theme_bw() + xlab("Average venom gland gene expression (TPM)") + ylab("Venom mRNA level (TPM)") + guides(shape = "none") + ggtitle("Protobothrops mucrosquamatus") + theme(plot.title = element_text(face = "italic")) + annotation_custom(grob = ggplotGrob(tp1, xmin = 1, xmax = 2, ymin = 3, ymax = 4))  + scale_y_log10(labels=trans_format('log10',math_format(10^.x))) +scale_x_log10(labels=trans_format('log10',math_format(10^.x)))

#p1_1 <- p1 + annotation_custom(grob = ggplotGrob(tp1 + theme(axis.title=element_blank(),
        #) ), 
        #xmin = .7, xmax = 3, ymin = 2, ymax = 5)

Investigating RNA levels in

taiwan %>% filter(tpm > 1) %>%
  select(-est_counts) %>% 
  mutate(tpm = log(1+tpm)) %>% pivot_wider(names_from =  library, values_from = tpm) %>%
  select(-taiwan2) %>%
  filter(!if_any(everything(), is.na)) %>%
  ggpairs(columns = 2:4)

# Where are venom toxins?

Pm_tpm_exp<-rbind(read_tsv("data/1-Taiwan-Female/abundance.tsv") %>% mutate(id="taiwan1"),
      read_tsv("data/2-Taiwan-Female/abundance.tsv") %>% mutate(id="taiwan2"),
      read_tsv("data/3-Taiwan-Male/abundance.tsv") %>% mutate(id="taiwan3"),
      read_tsv("data/11-Taiwan-Female/abundance.tsv") %>% mutate(id="taiwan11")) %>% 
  dplyr::select(target_id,tpm,id) %>% spread(id,tpm)
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   target_id = col_character(),
##   length = col_double(),
##   eff_length = col_double(),
##   est_counts = col_double(),
##   tpm = col_double()
## )
## 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   target_id = col_character(),
##   length = col_double(),
##   eff_length = col_double(),
##   est_counts = col_double(),
##   tpm = col_double()
## )
## 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   target_id = col_character(),
##   length = col_double(),
##   eff_length = col_double(),
##   est_counts = col_double(),
##   tpm = col_double()
## )
## 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   target_id = col_character(),
##   length = col_double(),
##   eff_length = col_double(),
##   est_counts = col_double(),
##   tpm = col_double()
## )
pm<-readRDS("./data/pm.RDS")
pm<-pm %>% dplyr::select(-est_counts) %>%  spread(library,tpm)
pm_tpm<-cbind(Pm_tpm_exp,pm[14:41]) 

dat<-pm_tpm %>% group_by(target_id) %>% 
  summarise(venom_gland=mean(Pm_10:Pm_9),venom_rna = mean(taiwan1:taiwan3)) %>% 
  filter(venom_gland >1,venom_rna>1) #filter to remove low exp genes hugging the axis to improve readability

toxins<-read.csv("./data/toxins.csv")

dat_toxins<-dat %>%filter(target_id %in% toxins$Transcripts) 

dat_toxins %>% ggplot()+
  geom_point(aes(x=target_id, y=venom_rna,color="venom"))+
  geom_line(aes(x=target_id,y=venom_rna,group =1), color="#6B1EE1")+
  geom_point(aes(x=target_id, y=venom_gland,color="gland"))+
  geom_line(aes(x=target_id,y=venom_gland,group=1), color="#94E11E")+
  scale_color_manual(values = c("#94E11E","#6B1EE1"))+
  scale_y_log10()+
  theme_classic()

cor(dat_toxins$venom_gland,dat_toxins$venom_rna, method = 's')
##           [,1]
## [1,] 0.6252115

Alluvial plots

toxins<-read.csv("./data/toxins.csv")
toxins<-rename(toxins, target_id=Transcripts)
allu_dat<-inner_join(dat_toxins,toxins,"target_id") %>% select(venom_gland, venom_rna, short) %>% rename(Toxin=short)
allu_dat<-allu_dat %>% gather(Toxin) %>%  mutate(toxin = rep(allu_dat$Toxin,2)) %>% rename(tissue=Toxin)

toxins<-c("SVMP","SVSP","CTL","PLA2","LAAO","CRISP","VEGF","BPP","NGF")
allu_dat<-allu_dat %>% filter(toxin %in% toxins)

ggplot(allu_dat, aes(y= log(value),
                     axis1 = toxin,
                     axis2 = tissue))+
  geom_alluvium(aes(fill = toxin))+
  geom_stratum()+
   geom_text(stat = "stratum",
            aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("toxin", "tissue"),
                   expand = c(0.15, 0.05))+
  scale_fill_viridis_d()+
  theme_void()

Principal component analysis

taiwan_venom2 <- taiwan_venom <- read_rds("data/pm.RDS") %>% left_join(genes, by = "target_id") %>% na.omit() %>% 
  group_by(gene, library) %>% summarize(tpm = sum(tpm)) 
## `summarise()` has grouped output by 'gene'. You can override using the `.groups` argument.
taiwan_merged2 <- rbind(taiwan %>% filter(tpm >= 2) %>% select(-est_counts), taiwan_venom2) %>% 
  pivot_wider(names_from = library, values_from = tpm) 

d <- taiwan_merged2 %>% ungroup %>% select(-gene,  -P_m_05_heart, -taiwan2) 
d <- d[sort(rowSums(d, na.rm = T), decreasing = T, index.return = T)[[2]][1:5000],]
d <- log(d+1) %>% t() %>% dist()
fit <- MASS::isoMDS(d, k=2)
## initial  value 24.091138 
## final  value 24.072651 
## converged
plotDat <- data.frame(x = fit$points[,1], y = fit$points[,2], tissue = c(rep("venom",3), rep(c("heart", "kidney", "liver"),2), c("kidney", "liver"), c("heart", "kidney", "liver"), rep("venom gland", 28)) ) 

centroids <- aggregate(cbind(x,y) ~ tissue, plotDat, mean)
conf.rgn  <- do.call(rbind, lapply(unique(plotDat$tissue), function(t)
  data.frame(tissue = as.character(t),
             ellipse(cov(plotDat[plotDat$tissue == t, 1:2]),
                     centre=as.matrix(centroids[centroids$tissue == t,2:3]),
                     level=0.95),
             stringsAsFactors=FALSE)))

tp2 <- ggplot(plotDat, aes(x,y,color = tissue)) + geom_point(size=3) + theme_bw() + geom_path(data=conf.rgn) + xlab("NMDS Axis 1") + ylab("NMDS Axis 2")+ theme(legend.position = 'bottom')
tp2

#g <- grid.arrange(tp1, tp2, ncol = 1)
#ggsave(file="plots/Figure 2 taiwan.pdf", g, width = 5, height = 10) 

As expected given the lower coverage the venom RNA nests with the venom gland samples

Module preservation in Taiwan habu data

modules <- read_csv(file = "data/modules.csv")

taiwan_venom_matrix <-  rbind(read_rds("data/pm.RDS") %>% filter(grepl("Pm", library)) %>% select(-est_counts),
      rbind(read_tsv("data/1-Taiwan-Female/abundance.tsv") %>% mutate(library = "taiwan1"),
            read_tsv("data/11-Taiwan-Female/abundance.tsv") %>% mutate(library = "taiwan11"),
            read_tsv("data/3-Taiwan-Male/abundance.tsv") %>% mutate(library = "taiwan3")) %>% 
        filter(est_counts > 1) %>% select(target_id, library, tpm) %>%
        mutate(tpm = log(1+tpm)) %>% select(library, target_id, tpm)) %>% 
        pivot_wider(names_from = target_id, values_from = tpm)

taiwan_venom_matrix2 <- taiwan_venom_matrix[,intersect(modules$target_id, colnames(taiwan_venom_matrix)[colSums(is.na(taiwan_venom_matrix)) == 0])]
gsg <- goodSamplesGenes(taiwan_venom_matrix2)
gsg$allOK

goodModules <- modules %>% filter(target_id %in% colnames(taiwan_venom_matrix2)) %>% group_by(colors) %>% summarize(count = n() > 200) %>% filter(count == T) %>% pull(colors) # filter out small modules

keep <- intersect(modules %>% filter(colors %in% goodModules) %>% pull(target_id), colnames(taiwan_venom_matrix2)) 

setLabels <- c("old", "new");
multiExpr <- list(old = list(data = taiwan_venom_matrix2[1:28, keep]), new = list(data = taiwan_venom_matrix2[28:30, keep]))
multiColor <- list(old = modules %>% filter(target_id %in% keep) %>% pull(colors))

allowWGCNAThreads(10)
mp <- modulePreservation(multiExpr, multiColor,
                         parallelCalculation = T,
                         referenceNetworks = 1,
                         checkData = F,
                         nPermutations = 200,
                         randomSeed = 1,
                         verbose = 3)

mp<-readRDS("./data/module_preservation.rds")
mp$preservation$Z$ref.old$inColumnsAlsoPresentIn.new %>% mutate(p = 10^mp$preservation$log.p$ref.old$inColumnsAlsoPresentIn.new$log.psummary.pres) %>% select (moduleSize, Zsummary.pres, p) 

#write_rds(mp, "data/module_preservation.rds")

The metavenom module is highly preserved in the venom RNA

#Human-habu_modules from orgin of specialisation study
habu_human_modules<-read_csv("./data/all_data_tpmKeep_modules.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   gene_symbol = col_character(),
##   colors = col_character(),
##   ens_gene_human = col_character(),
##   habu_target_id = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
#get muscle module
habu_human_muscle<-habu_human_modules %>% filter(colors == "yellow")

#filter the muscle data in vRNA gRNA matrix
pm_tpm_muscle<-pm_tpm %>% filter(target_id %in% habu_human_muscle$habu_target_id)

dat_muscle<-pm_tpm_muscle %>% group_by(target_id) %>% summarise(venom_rna=mean(taiwan1:taiwan3),venom_gland=mean(Pm_10:Pm_9))

cor(dat_muscle$venom_rna,dat_muscle$venom_gland,method = 's')
##           [,1]
## [1,] 0.4764015
#lower correlation for muscle than venom

dat_muscle %>% ggplot()+
  geom_point(aes(x=target_id, y=venom_rna,color="venom"))+
  geom_point(aes(x=target_id, y=venom_gland,color="gland"))+
  scale_color_manual(values = c("#94E11E","#6B1EE1"))+
  scale_y_log10()+
  ylab("RNA abundance (logTPM)")+
  theme_bw()
## Warning: Transformation introduced infinite values in continuous y-axis

#Most mucle abundance is in vgRNA. Comparatively, vRNA has lower amount of muscle contaminants.
taiwan_venom2 <- taiwan_venom <- read_rds("data/pm.RDS") %>% left_join(genes, by = "target_id") %>% na.omit() %>% 
  group_by(gene, library) %>% summarize(tpm = sum(tpm)) 
## `summarise()` has grouped output by 'gene'. You can override using the `.groups` argument.
taiwan_merged2 <- rbind(taiwan %>% filter(tpm >= 2) %>% select(-est_counts), taiwan_venom2) %>% 
  pivot_wider(names_from = library, values_from = tpm) 

taiwan_merged2<-taiwan_merged2 %>% filter(!gene %in% habu_human_muscle$habu_gene_id)

d <- taiwan_merged2 %>% ungroup %>% select(-gene,  -P_m_05_heart, -taiwan2) 
d <- d[sort(rowSums(d, na.rm = T), decreasing = T, index.return = T)[[2]][1:5000],]
d <- log(d+1) %>% t() %>% dist()
fit <- MASS::isoMDS(d, k=2)
## initial  value 21.796921 
## final  value 21.777990 
## converged
plotDat <- data.frame(x = fit$points[,1], y = fit$points[,2], tissue = c(rep("venom",3), rep(c("heart", "kidney", "liver"),2), c("kidney", "liver"), c("heart", "kidney", "liver"), rep("venom gland", 28)) ) 

centroids <- aggregate(cbind(x,y) ~ tissue, plotDat, mean)
conf.rgn  <- do.call(rbind, lapply(unique(plotDat$tissue), function(t)
  data.frame(tissue = as.character(t),
             ellipse(cov(plotDat[plotDat$tissue == t, 1:2]),
                     centre=as.matrix(centroids[centroids$tissue == t,2:3]),
                     level=0.95),
             stringsAsFactors=FALSE)))

tp2 <- ggplot(plotDat, aes(x,y,color = tissue)) + geom_point(size=3) + theme_bw() + geom_path(data=conf.rgn) + xlab("NMDS Axis 1") + ylab("NMDS Axis 2")+ theme(legend.position = 'bottom')
tp2

pm_tpm %>% group_by(target_id) %>% summarise(vRNA = mean(taiwan1:taiwan3),vgRNA = mean(Pm_10:Pm_9)) %>% filter(vRNA > 2 & vgRNA >2)->model_dat


fit<-lm(log10(model_dat$vgRNA)~log10(model_dat$vRNA))
summary(fit)
## 
## Call:
## lm(formula = log10(model_dat$vgRNA) ~ log10(model_dat$vRNA))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.10589 -0.34591  0.00843  0.32871  2.44065 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            0.59561    0.01723   34.57   <2e-16 ***
## log10(model_dat$vRNA)  0.51516    0.01056   48.80   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5151 on 5175 degrees of freedom
## Multiple R-squared:  0.3152, Adjusted R-squared:  0.315 
## F-statistic:  2381 on 1 and 5175 DF,  p-value: < 2.2e-16
plot(fit)

ggplot(fit,aes(fit$residuals))+
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

model_dat %>% 
  ggplot(aes(x=log10(vgRNA), y=log10(vRNA)))+
  geom_point()+
  geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

#model diagnostic plots how that the model has good properties

Protobothrops flavoviridis (Okinawa habu)

okinawa_old <- read_tsv("old/okinawa.txt") # protein data from Aird et al.
rbind(read_tsv("data/7-Okinawa-Male/abundance.tsv") %>% mutate(id = "okinawa7"),
read_tsv("data/8-Okinawa-Female/abundance.tsv") %>% mutate(id = "okinawa8"),
read_tsv("data/9-Okinawa-Male/abundance.tsv") %>% mutate(id = "okinawa9") ) %>% 
  left_join(read_tsv("data/okinawa_ref/abundance.tsv")  %>% mutate(ref = tpm) %>% select(ref, target_id), by = c("target_id")) -> okinawa

okinawa_merged <- left_join(okinawa_old, okinawa, by =c("Pf" = "target_id"))

okinawa_merged %>% group_by(id) %>% filter(est_counts >= 0) %>% summarize(cor = cor(tpm, ref), method = "s")
cor.test(okinawa_old$fpkm, okinawa_old$`frag/len`)
## 
##  Pearson's product-moment correlation
## 
## data:  okinawa_old$fpkm and okinawa_old$`frag/len`
## t = 2.6585, df = 102, p-value = 0.009114
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.06516617 0.42625081
## sample estimates:
##       cor 
## 0.2545596
okinawa_merged %>% group_by(id) %>% filter(est_counts >= 0) %>% summarize(cor = cor(tpm, `frag/len`), method = "s")
p2 <- okinawa_merged %>% filter(est_counts >= 1) %>% ggplot(aes(ref, tpm, color = id)) + geom_line(aes(group = Pf), color = "grey")+ geom_point() + scale_y_log10(labels=trans_format('log10',math_format(10^.x))) +scale_x_log10(labels=trans_format('log10',math_format(10^.x))) + theme_bw() + xlab("Venom gland gene expression (TPM)") + ylab("Venom mRNA level (TPM)") + guides(color = F) + ggtitle("Protobothrops flavoviridis") + theme(plot.title = element_text(face = "italic"))
p2
## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Transformation introduced infinite values in continuous x-axis

pf_toxins<-read_csv("./data/Protobothrops-Table 3.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   ID = col_character(),
##   `Toxin Class` = col_character(),
##   FPKM = col_number(),
##   `% FPKM` = col_character(),
##   Summary = col_character()
## )
pf_venom_toxins<-left_join(okinawa_merged %>% filter(Pf %in% pf_toxins$ID) %>% select(Pf,tpm,id) %>% spread(id,tpm),
                           okinawa_merged %>% select(Pf,ref) %>% group_by(Pf) %>% summarise(ref=mean(ref))) #dataset with 57 toxins (>1 est_count)
## Joining, by = "Pf"
cor(pf_venom_toxins %>% group_by(Pf) %>% summarise(mean_tpm=mean(okinawa7:okinawa9)) %>% select(mean_tpm),pf_venom_toxins$ref, method = 's')
##               [,1]
## mean_tpm 0.7022297
#toxin vRNA and vgRNA highly correlated

dat_toxins<-cbind(pf_venom_toxins$Pf,pf_venom_toxins %>% group_by(Pf) %>% summarise(mean_tpm=mean(okinawa7:okinawa9)) %>% select(mean_tpm),pf_venom_toxins$ref) %>% rename(target_id=`pf_venom_toxins$Pf`,venom_rna=mean_tpm,venom_gland=`pf_venom_toxins$ref`)

dat_toxins<-left_join(dat_toxins,pf_toxins %>% rename(target_id=ID), by='target_id') %>% rename(toxin_class=`Toxin Class`)

dat_toxins %>% ggplot()+
  geom_point(aes(x=toxin_class, y=venom_rna,color="venom"))+
  geom_line(aes(x=toxin_class,y=venom_rna,group =1), color="#6B1EE1")+
  geom_point(aes(x=toxin_class, y=venom_gland,color="gland"))+
  geom_line(aes(x=toxin_class,y=venom_gland,group=1), color="#94E11E")+
  scale_color_manual(values = c("#94E11E","#6B1EE1"))+
  scale_y_log10()+
  ylab("RNA abundance (logTPM)")+
  theme_bw()
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

Ovophis okinavensis (Hime habu)

ovophis_old <- read_tsv("old/ovophis.txt") # protein data from Aird et al.
rbind(read_tsv("data/10-Hime-Female/abundance.tsv") %>% mutate(id = "hime10"),
      read_tsv("data/4-Hime-Female/abundance.tsv") %>% mutate(id = "hime4"),
      read_tsv("data/5-Hime-Female/abundance.tsv") %>% mutate(id = "hime5"),
read_tsv("data/6-Hime-Female/abundance.tsv") %>% mutate(id = "hime6") ) %>% 
  left_join(read_tsv("data/ovophis_ref/abundance.tsv")  %>% mutate(ref = tpm) %>% select(ref, target_id), by = c("target_id")) -> ovophis

ovophis_merged <- left_join(ovophis_old, ovophis, by =c("Oo" = "target_id"))

ovophis_merged %>% group_by(id) %>% filter(est_counts > 1) %>% summarize(cor = cor(tpm, ref), method = "s")
cor.test(ovophis_old$fpkm, ovophis_old$`frag/len`)
## 
##  Pearson's product-moment correlation
## 
## data:  ovophis_old$fpkm and ovophis_old$`frag/len`
## t = 3.3867, df = 66, p-value = 0.001196
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1611356 0.5708343
## sample estimates:
##      cor 
## 0.384778
ovophis_merged %>% group_by(id) %>% filter(est_counts > 1) %>% summarize(cor = cor(tpm, `frag/len`), method = "s")
p3 <- ovophis_merged %>% filter(est_counts > 1) %>% ggplot(aes(ref, tpm, color = id))  + geom_line(aes(group = Oo), color = "grey") + geom_point() + scale_y_log10(labels=trans_format('log10',math_format(10^.x))) +scale_x_log10(labels=trans_format('log10',math_format(10^.x))) + theme_bw() + xlab("Venom gland gene expression (TPM)") + ylab("Venom mRNA level (TPM)") + guides(color = F) + ggtitle("Ovophis okinavensis") + theme(plot.title = element_text(face = "italic")) 

p3

p <- list(p1,p2,p3) %>% map(~.x + labs(x=NULL, y=NULL))
grid.arrange(grobs=p, ncol=3, bottom = "Venom gland gene expression (TPM)", left = "Venom mRNA level (TPM)")
## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Transformation introduced infinite values in continuous x-axis

g <- arrangeGrob(grobs=p, ncol=3, bottom = "Venom gland gene expression (TPM)", left = "Venom mRNA level (TPM)") 
## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Transformation introduced infinite values in continuous x-axis
#ggsave(file="plots/Figure 1 TPM.pdf", g, width = 10, height = 5) 
oo_toxins<-read_csv("./data/Ovophis-Table 3.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   ID = col_character(),
##   `Toxin Class` = col_character(),
##   FPKM = col_double(),
##   `% FPKM` = col_character(),
##   Summary = col_character()
## )
oo_venom_toxins<-left_join(ovophis_merged %>% filter(Oo %in% oo_toxins$ID) %>% select(Oo,tpm,id) %>% spread(id,tpm),
                           ovophis_merged %>% select(Oo,ref) %>% group_by(Oo) %>% summarise(ref=mean(ref))) #dataset with 49 toxins (>1 est_count)
## Joining, by = "Oo"
cor(oo_venom_toxins %>% group_by(Oo) %>% summarise(mean_tpm=mean(hime10:hime6)) %>% select(mean_tpm),oo_venom_toxins$ref, method = 's')
##               [,1]
## mean_tpm 0.7313452
#toxin vRNA and vgRNA highly correlated

dat_toxins<-cbind(oo_venom_toxins$Oo,oo_venom_toxins %>% group_by(Oo) %>% summarise(mean_tpm=mean(hime10:hime6)) %>% select(mean_tpm),oo_venom_toxins$ref) %>% rename(target_id=`oo_venom_toxins$Oo`,venom_rna=mean_tpm,venom_gland=`oo_venom_toxins$ref`)

dat_toxins<-left_join(dat_toxins,oo_toxins %>% rename(target_id=ID), by='target_id') %>% rename(toxin_class=`Toxin Class`)

dat_toxins %>% ggplot()+
  geom_point(aes(x=toxin_class, y=venom_rna,color="venom"))+
  geom_line(aes(x=toxin_class,y=venom_rna,group =1), color="#6B1EE1")+
  geom_point(aes(x=toxin_class, y=venom_gland,color="gland"))+
  geom_line(aes(x=toxin_class,y=venom_gland,group=1), color="#94E11E")+
  scale_color_manual(values = c("#94E11E","#6B1EE1"))+
  scale_y_log10()+
  ylab("RNA abundance (logTPM)")+
  theme_bw()
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis